################################################################################
# Pooling the Polls - Netherlands
# Author: Tom Louwerse
# Based on code from Simon Jackman
# Date: 11 June 2012
# MAIN MODEL
# Last modified: 
# - 18/6/2017: Adapted for 2017 parliament
################################################################################


################################################################################
# PRELIMINARIES
################################################################################

# Read libraries
library(rjags)      # CRAN v4-10      
library(colorspace) # CRAN v2.0-0 
library(dplyr)      # CRAN v1.0.2      
library(car)        # CRAN v3.0-10        
library(foreach)    # CRAN v1.5.1
library(doParallel) # CRAN v1.0.16
library(rjags)      # CRAN v4-10
library(coda)

load("sourcedata.RData")


################################################################################
# RUN MODEL
################################################################################

# Function for running model with multiple house effects
runmodel_mhe = function(dat, margin=rep(0.0005,nrow(dat)), outcome=0,n.adapt=1e5,n.iter=5e5,thin=1e2,electionDate=as.Date("2017-03-15"),bugfile="Modellen/current_model.bug", 
                        start=NA,end=NA, endall=NA) {
  
  library(rjags)    # CRAN v4-10    
  library(coda)     # CRAN v4-10
  
  # Recode houseperiod, so that minimum is always 1
  dat$houseperiod <- as.integer(dat$houseperiod) - 
    min(as.integer(dat$houseperiod)) + 1
  
  # Start transition
  startTransition = 2
  if(!is.na(start)) {
    startTransition <- as.numeric(julian(start, origin=as.Date("2010-01-01")) - 
                                    julian(electionDate, origin=as.Date("2010-01-01")) + 1)
  }
  
  # End transition
  # For parties that stop existing during a term, transitions stop per the date of the last poll
  # but alpha will continue to be set at zero, for purposes of subsequent plotting etc
  if(!is.na(end)) {
    if(julian(end, origin=as.Date("2010-01-01")) > max(dat$datenum)) end = as.Date(max(dat$datenum), origin="2010-01-01")
    NPERIODS = length(julian(electionDate, origin=as.Date("2010-01-01")):julian(end, origin=as.Date("2010-01-01")))
    alpha=c(outcome, 
            rep(NA,length(julian(electionDate, origin=as.Date("2010-01-01")):
                            julian(end, origin=as.Date("2010-01-01"))-1)),
            rep(0,length((julian(end, origin=as.Date("2010-01-01"))+1):
                           julian(endall, origin=as.Date("2010-01-01"))-1)))
  } else {
    NPERIODS = length(julian(electionDate, origin=as.Date("2010-01-01")):max(dat$datenum))
    alpha=c(outcome, 
            rep(NA,length(julian(electionDate, 
                                 origin=as.Date("2010-01-01")):max(dat$datenum))-1))
  }
  
  
  
  foo <- list(y=dat$percentage,
              vari=dat$var,
              date=as.numeric(dat$datenum - 
                                julian(electionDate, 
                                       origin=as.Date("2010-01-01")) + 1),
              houseperiod=dat$houseperiod,
              org=as.integer(as.factor(as.character(dat$bedrijf))),
              margin=margin,
              NHOUSE=length(unique(dat$bedrijf)),
              NPOLLS=length(dat$percentage),
              NPERIODS=NPERIODS,
              NHOUSEPERIODS=length(unique(dat$houseperiod)),
              STARTTRANSITION=startTransition,
              alpha=alpha)
  
  
  # Define weights for pollsters
  house_names <- sort(unique(dat$bedrijf))
  house_weights <- matrix(1, foo$NHOUSE, foo$NHOUSEPERIODS)
  
  # Fix some issues with polling companies starting later or not polling in certain 'house periods'
  if("EenVandaag" %in% house_names & foo$NHOUSEPERIODS > 1) {
    io_which <- which(house_names == "EenVandaag")
    house_weights[io_which,2:foo$NHOUSEPERIODS] <- 0 # set weight for 1V (GfK) post-2018 to 0
  }
  if(foo$NHOUSEPERIODS == 3) {
    period3_which <- which(house_names %in% c("EenVandaag", "Peil"))
    house_weights[period3_which,3] <- 0 # set weight for other pollsters to 0 in period 3
  }
  
  
  house_weights <- house_weights / matrix(colSums(house_weights), 
                                          foo$NHOUSE, 
                                          foo$NHOUSEPERIODS, 
                                          byrow=TRUE)
  foo$house_weights <- house_weights
  print(house_weights)
  
  # Parties that were not polled before a certain date, are set to 0 beforehand
  if(!is.na(start)) {
    setToZero <- julian(electionDate, origin=as.Date("2010-01-01")):
      julian(start, origin=as.Date("2010-01-01")) -
      julian(electionDate, origin=as.Date("2010-01-01")) +1
    foo$alpha[setToZero]  <- 0
  }
  
  
  
  inits <- list(y2=dat$percentage)
  
  model = rjags::jags.model(bugfile, foo, n.adapt=n.adapt, inits=inits)
  
  est = rjags::coda.samples(model, c("alpha", "sigma", "house","pie"), n.iter=n.iter, thin=thin)
  return(est)
}

# Full run ----------------------------------------------------------------
cat("Start analysis\n")
flush.console()

workerCount= as.integer(Sys.getenv("NUMBER_OF_PROCESSORS")) - 1 
w <- makeCluster(workerCount)
registerDoParallel(w)

outputs <- foreach(i=1:length(sourcedata)) %dopar%
              runmodel_mhe(dat=sourcedata[[i]]$dat, 
                           sourcedata[[i]]$dat$margin,
                           outcome=sourcedata[[i]]$outcome, 
                           n.adapt=1e4,n.iter=3e5,thin=120, 
                           electionDate=as.Date("2017-03-15"),
                           start=sourcedata[[i]]$start,
                           end=sourcedata[[i]]$end,
                           endall=sourcedata[[i]]$endall,
                           bugfile=sourcedata[[i]]$model)
save(outputs, file="outputs.RData")


stopCluster(w)



# Basic results -----------------------------------------------------------

partylist <- sapply(sourcedata, function(q) q$partyname)

getData <- function(X) {
  alpha.pos <- grep("^alpha",colnames(X[[1]]))
  alpha.last.pos <- grep(paste("^alpha\\[", max(alpha.pos),"\\]", sep=""),
                         colnames(X[[1]]))
  x <- X[[1]][,alpha.last.pos]
  return(c(mean(x),quantile(x,c(0.025,.975))))
}

alldata <- foreach(i=1:length(outputs), .combine=rbind) %do% {
  getData(outputs[[i]])
}

rownames(alldata) <- partylist
colnames(alldata) <- c("Percentage", "Margin_Low", "Margin_High")

print(round(alldata * 100, 1))

